Simple Metropolis-Hastings

chapter 2



In [2]:

    
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import tqdm

%matplotlib inline
sns.set(font_scale=1.5)



In [14]:

    
def metropolis(func, steps=10000):
    """A very simple Metropolis implementation"""
    samples = np.zeros(steps)
    # start at mean for heck of it
    old_x = func.mean()
    # get the prob density at this point
    old_prob = func.pdf(old_x)
    for i in range(steps):
        # take a small step some direction
        new_x = old_x + np.random.normal(0, 0.5)
        # get the prob density
        new_prob = func.pdf(new_x)
        # compute acceptace parameter
        acceptance = new_prob/old_prob
        # if accepted (bigger than some rangom number)
        # the more better the new point it the higher chance it is accepted
        if acceptance >= np.random.random():
            # keep the new point and make new the old point
            samples[i] = new_x
            old_x = new_x
            old_prob = new_prob
        else:
            # if rejected keep the old point
            samples[i] = old_x
    return samples



In [21]:

    
np.random.seed(2)


func = stats.beta(0.4, 2)
samples = metropolis(func=func)
x = np.linspace(0.01, .99, 100)
y = func.pdf(x)
plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)









    Out[21]:





<matplotlib.legend.Legend at 0x7ff9b876bbd0>



In [22]:

    
np.random.seed(2)

func = stats.norm(10, 2)
samples = metropolis(func=func)
x = np.linspace(0.01, 25, 1000)
y = func.pdf(x)
# plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)









    Out[22]:





<matplotlib.legend.Legend at 0x7ff9d88278d0>



In [30]:

    
def metropolis_extra(func, steps=10000):
    """A very simple Metropolis implementation"""
    samples = np.zeros(steps)
    accept = np.zeros(steps)
    accept2 = np.zeros(steps)

    # start at mean for heck of it
    old_x = func.mean()
    # get the prob density at this point
    old_prob = func.pdf(old_x)
    for i in range(steps):
        # take a small step some direction
        new_x = old_x + np.random.normal(0, 0.5)
        # get the prob density
        new_prob = func.pdf(new_x)
        # compute acceptace parameter
        acceptance = new_prob/old_prob
        accept[i] = acceptance
        # if accepted (bigger than some rangom number)
        # the more better the new point it the higher chance it is accepted
        rnd = np.random.random()
        accept2[i] = rnd
        if acceptance >= rnd:
            # keep the new point and make new the old point
            samples[i] = new_x
            old_x = new_x
            old_prob = new_prob
        else:
            # if rejected keep the old point
            samples[i] = old_x
    return samples, accept, accept2



In [45]:

    
np.random.seed(2)

func = stats.norm(10, 2)
samples, accept, accept2 = metropolis_extra(func=func)
x = np.linspace(0.01, 25, 10)
y = func.pdf(x)
# plt.xlim(0, 1)
plt.plot(x, y, 'r-', lw=3, label='True distribution')
plt.hist(samples, bins=32, density=True, label='Estimated distribution')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$pdf(x)$', fontsize=14)
plt.legend(fontsize=14)

plt.figure()
plt.plot(accept, accept2, '.')


plt.figure()
plt.hist(accept, bins=20, density=True)

plt.figure()
plt.hist((accept>accept2).astype(int), bins=[-1,0, .5,1, 1.5], density=True)









    Out[45]:





(array([0.    , 0.1556, 0.    , 1.8444]),
 array([-1. ,  0. ,  0.5,  1. ,  1.5]),
 <a list of 4 Patch objects>)



In [44]:

    
(accept>accept2)









    Out[44]:





array([ True,  True,  True, ...,  True,  True,  True])

Then following https://twiecki.io/blog/2015/11/10/mcmc-sampling/ we build and test more



In [1]:

    
%matplotlib inline

import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import norm

sns.set_style('white')
sns.set_context('talk')

np.random.seed(123)



In [2]:

    
data = np.random.randn(20)



In [4]:

    
ax = plt.subplot()
sns.distplot(data, kde=False, ax=ax)
_ = ax.set(title='Histogram of observed data', xlabel='x', ylabel='# observations');



In [5]:

    
def calc_posterior_analytical(data, x, mu_0, sigma_0):
    sigma = 1.
    n = len(data)
    mu_post = (mu_0 / sigma_0**2 + data.sum() / sigma**2) / (1. / sigma_0**2 + n / sigma**2)
    sigma_post = (1. / sigma_0**2 + n / sigma**2)**-1
    return norm(mu_post, np.sqrt(sigma_post)).pdf(x)

ax = plt.subplot()
x = np.linspace(-1, 1, 500)
posterior_analytical = calc_posterior_analytical(data, x, 0., 1.)
ax.plot(x, posterior_analytical)
ax.set(xlabel='mu', ylabel='belief', title='Analytical posterior');
sns.despine()



In [8]:

    
def sampler(data, samples=4, mu_init=.5, proposal_width=.5, plot=False, mu_prior_mu=0, mu_prior_sd=1.):
    mu_current = mu_init
    posterior = [mu_current]
    for i in range(samples):
        # suggest new position
        mu_proposal = norm(mu_current, proposal_width).rvs()

        # Compute likelihood by multiplying probabilities of each data point
        likelihood_current = norm(mu_current, 1).pdf(data).prod()
        likelihood_proposal = norm(mu_proposal, 1).pdf(data).prod()
        
        # Compute prior probability of current and proposed mu        
        prior_current = norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
        prior_proposal = norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
        
        p_current = likelihood_current * prior_current
        p_proposal = likelihood_proposal * prior_proposal
        
        # Accept proposal?
        p_accept = p_proposal / p_current
        
        # Usually would include prior probability, which we neglect here for simplicity
        accept = np.random.rand() < p_accept
        
        if plot:
            plot_proposal(mu_current, mu_proposal, mu_prior_mu, mu_prior_sd, data, accept, posterior, i)
        
        if accept:
            # Update position
            mu_current = mu_proposal
        
        posterior.append(mu_current)
        
    return np.array(posterior)

# Function to display
def plot_proposal(mu_current, mu_proposal, mu_prior_mu, mu_prior_sd, data, accepted, trace, i):
    from copy import copy
    trace = copy(trace)
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(16, 4))
    fig.suptitle('Iteration %i' % (i + 1))
    x = np.linspace(-3, 3, 5000)
    color = 'g' if accepted else 'r'
        
    # Plot prior
    prior_current = norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
    prior_proposal = norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
    prior = norm(mu_prior_mu, mu_prior_sd).pdf(x)
    ax1.plot(x, prior)
    ax1.plot([mu_current] * 2, [0, prior_current], marker='o', color='b')
    ax1.plot([mu_proposal] * 2, [0, prior_proposal], marker='o', color=color)
    ax1.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
                 arrowprops=dict(arrowstyle="->", lw=2.))
    ax1.set(ylabel='Probability Density', title='current: prior(mu=%.2f) = %.2f\nproposal: prior(mu=%.2f) = %.2f' % (mu_current, prior_current, mu_proposal, prior_proposal))
    
    # Likelihood
    likelihood_current = norm(mu_current, 1).pdf(data).prod()
    likelihood_proposal = norm(mu_proposal, 1).pdf(data).prod()
    y = norm(loc=mu_proposal, scale=1).pdf(x)
    sns.distplot(data, kde=False, norm_hist=True, ax=ax2)
    ax2.plot(x, y, color=color)
    ax2.axvline(mu_current, color='b', linestyle='--', label='mu_current')
    ax2.axvline(mu_proposal, color=color, linestyle='--', label='mu_proposal')
    #ax2.title('Proposal {}'.format('accepted' if accepted else 'rejected'))
    ax2.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
                 arrowprops=dict(arrowstyle="->", lw=2.))
    ax2.set(title='likelihood(mu=%.2f) = %.2f\nlikelihood(mu=%.2f) = %.2f' % (mu_current, 1e14*likelihood_current, mu_proposal, 1e14*likelihood_proposal))
    
    # Posterior
    posterior_analytical = calc_posterior_analytical(data, x, mu_prior_mu, mu_prior_sd)
    ax3.plot(x, posterior_analytical)
    posterior_current = calc_posterior_analytical(data, mu_current, mu_prior_mu, mu_prior_sd)
    posterior_proposal = calc_posterior_analytical(data, mu_proposal, mu_prior_mu, mu_prior_sd)
    ax3.plot([mu_current] * 2, [0, posterior_current], marker='o', color='b')
    ax3.plot([mu_proposal] * 2, [0, posterior_proposal], marker='o', color=color)
    ax3.annotate("", xy=(mu_proposal, 0.2), xytext=(mu_current, 0.2),
                 arrowprops=dict(arrowstyle="->", lw=2.))
    #x3.set(title=r'prior x likelihood $\propto$ posterior')
    ax3.set(title='posterior(mu=%.2f) = %.5f\nposterior(mu=%.2f) = %.5f' % (mu_current, posterior_current, mu_proposal, posterior_proposal))
    
    if accepted:
        trace.append(mu_proposal)
    else:
        trace.append(mu_current)
    ax4.plot(trace)
    ax4.set(xlabel='iteration', ylabel='mu', title='trace')
    plt.tight_layout()
    #plt.legend()



In [9]:

    
np.random.seed(123)
sampler(data, samples=8, mu_init=-1., plot=True);



In [10]:

    
posterior = sampler(data, samples=15000, mu_init=1.)
fig, ax = plt.subplots()
ax.plot(posterior)
_ = ax.set(xlabel='sample', ylabel='mu');



In [11]:

    
ax = plt.subplot()

sns.distplot(posterior[500:], ax=ax, label='estimated posterior')
x = np.linspace(-.5, .5, 500)
post = calc_posterior_analytical(data, x, 0, 1)
ax.plot(x, post, 'g', label='analytic posterior')
_ = ax.set(xlabel='mu', ylabel='belief');
ax.legend();



In [12]:

    
posterior_small = sampler(data, samples=5000, mu_init=1., proposal_width=.01)
fig, ax = plt.subplots()
ax.plot(posterior_small);
_ = ax.set(xlabel='sample', ylabel='mu');



In [13]:

    
posterior_large = sampler(data, samples=5000, mu_init=1., proposal_width=3.)
fig, ax = plt.subplots()
ax.plot(posterior_large); plt.xlabel('sample'); plt.ylabel('mu');
_ = ax.set(xlabel='sample', ylabel='mu');



In [14]:

    
sns.distplot(posterior_small[1000:], label='Small step size')
sns.distplot(posterior_large[1000:], label='Large step size');
_ = plt.legend();



In [15]:

    
from pymc3.stats import autocorr
lags = np.arange(1, 100)
fig, ax = plt.subplots()
ax.plot(lags, [autocorr(posterior_large, l) for l in lags], label='large step size')
ax.plot(lags, [autocorr(posterior_small, l) for l in lags], label='small step size')
ax.plot(lags, [autocorr(posterior, l) for l in lags], label='medium step size')
ax.legend(loc=0)
_ = ax.set(xlabel='lag', ylabel='autocorrelation', ylim=(-.1, 1))



In [19]:

    
import pymc3 as pm

with pm.Model():
    mu = pm.Normal('mu', 0, 1)
    sigma = 1.
    returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
    
    step = pm.Metropolis()
    trace = pm.sample(15000, step)









    



Multiprocess sampling (2 chains in 2 jobs)
Metropolis: [mu]
Sampling 2 chains: 100%|██████████| 31000/31000 [00:04<00:00, 7004.94draws/s]
The number of effective samples is smaller than 25% for some parameters.



In [20]:

    
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();



In [21]:

    
with pm.Model():
    mu = pm.Normal('mu', 0, 1)
    sigma = 1.
    returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
    
    step = pm.NUTS()
    trace2 = pm.sample(15000, step)









    



Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu]
Sampling 2 chains: 100%|██████████| 31000/31000 [00:08<00:00, 3611.19draws/s]



In [22]:

    
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(trace2[2000:]['mu'], label='PyMC3 NUTS');

sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();



In [26]:

    
with pm.Model():
    mu = pm.Normal('mu', 0, 1)
    sigma = 1.
    returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)
    
    step = pm.NUTS()
    trace2 = pm.sample(1000, step)
    
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000:]['mu'], label='PyMC3 sampler');
sns.distplot(trace2['mu'], label='PyMC3 NUTS');

sns.distplot(posterior[500:], label='Hand-written sampler');
plt.legend();









    



Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu]
Sampling 2 chains: 100%|██████████| 3000/3000 [00:01<00:00, 2741.83draws/s]



In [33]:

    
pm.plot_autocorr(trace2)
plt.suptitle('NUTS')









    Out[33]:





Text(0.5, 0.98, 'NUTS')



In [34]:

    
pm.plot_autocorr(trace)
plt.suptitle('Metropolis')









    Out[34]:





Text(0.5, 0.98, 'Metropolis')



In [35]:

    
pm.plot_autocorr(posterior)
plt.suptitle('Simple')









    Out[35]:





Text(0.5, 0.98, 'Simple')

Try and remove autocorrelation and see



In [30]:

    
plt.subplots(figsize=(10,6))
sns.distplot(trace[2000::15]['mu'], label='PyMC3 sampler');
sns.distplot(trace2['mu'][::5], label='PyMC3 NUTS');

sns.distplot(posterior[500::10], label='Hand-written sampler');
plt.legend()









    Out[30]:





<matplotlib.legend.Legend at 0x12c2f1940>



In [ ]: